/*
EU-SILC cross-sectional analysis of hours worked and self-reported health
for self-employed with employees, by gender and child age.
Produces results for Figures 3 and A4.

Uses:
$silc_data/Intermediate_Data/0423_R_vars.dta
$silc_data/Intermediate_Data/0423_P_vars.dta
$silc_data/Intermediate_Data/0423_H_vars.dta

Creates:
$silc_data/Final_Data/0423_EUSILC_cleaned.dta
$silc_out/{f,m}_hrs.dta
$silc_out/{f,m}_hrs_average.dta
$silc_out/{f,m}_goodhealth.dta
$silc_out/{f,m}_goodhealth_average.dta
$silc_out/lincom_avg_0_3.dta
$exhibits/summary_regression_sample.tex
*/

*** Set directory
clear
global silc_data "$root/data/silc"
global silc_out  "$root/estimates/silc"
global exhibits  "$root/exhibits"

ssc install parmest, replace
ssc install estout


*** Use Personal Register (R-file)
use "$silc_data/Intermediate_Data/0423_R_vars.dta", clear


*** Merge in Personal data (P-file)
merge 1:1 year persid using "$silc_data/Intermediate_Data/0423_P_vars.dta"
keep if _merge==3
drop _merge


*** Merge in Household Data (H-file)
merge m:1 year hhid using "$silc_data/Intermediate_Data/0423_H_vars.dta"
keep if _merge==3
drop _merge

gen age2=age*age
gen child=.
label define child_l ///
	99 "No child" ///
	1 "Child age 0-1" ///
	2 "Child age 2" ///
	3 "Child age 3" ///
	4 "Child age 4" ///
	5 "Child age 5" ///
	6 "Child age 6" ///
	7 "Child age 7" ///
	8 "Child age 8" ///
	9 "Child age 9" ///
	10 "Child age 10" ///
	11 "Child age 11-18"
label var child child_l
replace child=99 if haschild==0
replace child=1 if haschild==1 & (child_age1==0 | child_age1==1)
replace child=2 if haschild==1 & child_age1==2
replace child=3 if haschild==1 & child_age1==3
replace child=4 if haschild==1 & child_age1==4
replace child=5 if haschild==1 & child_age1==5
replace child=6 if haschild==1 & child_age1==6
replace child=7 if haschild==1 & child_age1==7
replace child=8 if haschild==1 & child_age1==8
replace child=9 if haschild==1 & child_age1==9
replace child=10 if haschild==1 & child_age1==10
replace child=11 if haschild==1 & child_age1>=11 & child_age1<=18

encode nace, gen(nace_num)
replace nace_num=99 if nace_num==1 // Missing

gen l_hrs=log(hrs)


save "$silc_data/Final_Data/0423_EUSILC_cleaned.dta", replace

********************************************************************************
*** Self-Employed with employees
********************************************************************************

*** Event Study Regressions
use "$silc_data/Final_Data/0423_EUSILC_cleaned.dta", clear

// Self-employed with employees
regr hrs_work i.haschild age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==1 & child!=11 & good_health!=. & year>=2008, robust
matrix b = e(b)
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "0.haschild 1.haschild age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/f_hrs_average.dta", replace
restore

regr hrs_work i.haschild age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==0 & child!=11 & good_health!=. & year>=2008, robust
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "0.haschild 1.haschild age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 2.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 4.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 9.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/m_hrs_average.dta", replace
restore

regr good_health i.haschild age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==1 & child!=11 & hrs_work!=. & year>=2008, robust
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "0.haschild 1.haschild age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/f_goodhealth_average.dta", replace
restore

regr good_health i.haschild age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==0 & child!=11 & hrs_work!=. & year>=2008, robust
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "0.haschild 1.haschild age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 2.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 4.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 9.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/m_goodhealth_average.dta", replace
restore

regr hrs_work ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==1 & child!=11 & good_health!=. & year>=2008, robust
lincom (_b[1.child] + _b[2.child] + _b[3.child]) / 3
local f_hrs_b = r(estimate)
local f_hrs_se = r(se)
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "1.child 2.child 3.child 4.child 5.child 6.child 7.child 8.child 9.child 10.child 0.child age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/f_hrs.dta", replace
restore

regr hrs_work ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==0 & child!=11 & good_health!=. & year>=2008, robust
lincom (_b[1.child] + _b[2.child] + _b[3.child]) / 3
local m_hrs_b = r(estimate)
local m_hrs_se = r(se)
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "1.child 2.child 3.child 4.child 5.child 6.child 7.child 8.child 9.child 10.child 0.child age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 2.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 4.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 9.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/m_hrs.dta", replace
restore

regr good_health ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==1 & child!=11 & hrs_work!=. & year>=2008, robust
lincom (_b[1.child] + _b[2.child] + _b[3.child]) / 3
local f_gh_b = r(estimate)
local f_gh_se = r(se)
matrix b = e(b)
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "1.child 2.child 3.child 4.child 5.child 6.child 7.child 8.child 9.child 10.child 0.child age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/f_goodhealth.dta", replace
restore

regr good_health ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==0 & child!=11 & hrs_work!=. & year>=2008, robust
lincom (_b[1.child] + _b[2.child] + _b[3.child]) / 3
local m_gh_b = r(estimate)
local m_gh_se = r(se)
matrix b = e(b)
matrix list b
matrix V = e(V)
local num_vars = colsof(b)
preserve
clear
set obs `num_vars'
gen variable = ""
gen coef = .
gen se = .
local varnames "1.child 2.child 3.child 4.child 5.child 6.child 7.child 8.child 9.child 10.child 0.child age age2 1.marital 2.marital 3.marital 4.marital 5.marital 1.educ 2.educ 3.educ 4.educ 5.educ 2.nace_num 3.nace_num 4.nace_num 5.nace_num 6.nace_num 7.nace_num 8.nace_num 9.nace_num 10.nace_num 12.nace_num 13.nace_num 14.nace_num 99.nace_num 2008 2009 2010 2011 2012 2013 2021 2022 2023 _cons"

forval i = 1/`num_vars' {
    local varname : word `i' of `varnames'
    replace variable = "`varname'" in `i'
    replace coef = b[1, `i'] in `i'
    replace se = sqrt(V[`i', `i']) in `i'
}
save "$silc_out/m_goodhealth.dta", replace
restore

* Save lincom averages (mean of child ages 0-3) to file
preserve
clear
set obs 4
gen sex = ""
gen outcome = ""
gen coef = .
gen se = .
replace sex = "Women" in 1
replace outcome = "hrs_work" in 1
replace coef = `f_hrs_b' in 1
replace se = `f_hrs_se' in 1
replace sex = "Men" in 2
replace outcome = "hrs_work" in 2
replace coef = `m_hrs_b' in 2
replace se = `m_hrs_se' in 2
replace sex = "Women" in 3
replace outcome = "good_health" in 3
replace coef = `f_gh_b' in 3
replace se = `f_gh_se' in 3
replace sex = "Men" in 4
replace outcome = "good_health" in 4
replace coef = `m_gh_b' in 4
replace se = `m_gh_se' in 4
save "$silc_out/lincom_avg_0_3.dta", replace
restore


*** Summary Table

* Create sample markers for summary table
regr hrs_work ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==1 & child!=11 & good_health!=. & year>=2008, robust
gen sample_female=e(sample)
regr hrs_work ib99.child age age2 i.marital i.educ ib99.nace_num i.year ///
if employment==1 & age<=60 & female==0 & child!=11 & good_health!=. & year>=2008, robust
gen sample_male=e(sample)

gen has_employees=employment==1
replace has_employees=0 if employment==2

eststo clear
clear mata

* Updated variable list (added has_employees)
local varlist age married college child num_kids child_age1 good_health hrs_work

* 10 variables + 1 row for N = 11 rows
matrix SUMTAB = J(9, 4, .)
local count = 1

foreach x of local varlist {

    * Women
    quietly summ `x' if sample_female == 1, detail
    matrix SUMTAB[`count', 1] = round(r(mean), .01)
    matrix SUMTAB[`count', 2] = round(r(sd), .01)

    * Men
    quietly summ `x' if sample_male == 1, detail
    matrix SUMTAB[`count', 3] = round(r(mean), .01)
    matrix SUMTAB[`count', 4] = round(r(sd), .01)

    local count = `count' + 1
}

* Add observation counts
count if sample_female == 1
matrix SUMTAB[9, 1] = r(N)
matrix SUMTAB[9, 2] = .
count if sample_male == 1
matrix SUMTAB[9, 3] = r(N)
matrix SUMTAB[9, 4] = .

* Label rows
matrix rownames SUMTAB =  ///
"Age" ///
"Married" ///
"College Educ or higher" ///
"Has Child" ///
"Number of Children" ///
"Age of Youngest Child" ///
"Good Health" ///
"Hours of work" ///
"Observations"

matrix colnames SUMTAB = "Women Mean" "Women SD" "Men Mean" "Men SD"

* Export LaTeX table
esttab matrix(SUMTAB) using "$exhibits/summary_regression_sample.tex", replace ///
    nomtitles ///
    prehead("\begin{tabular}{lcccc}" "\hline\hline") ///
    posthead("\hline") ///
    postfoot("\hline\hline" "\end{tabular}")
